%matplotlib inline
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
from scipy import stats
from scipy.stats import entropy
from scipy.optimize import minimizeCode 1: Bayesian Inference
```kspsrnwthxmt This is a reference notebook for the book Bayesian Modeling and Computation in Python :class: tip, dropdown The textbook is not needed to use or run this code, though the context and explanation is missing from this notebook.
If you’d like a copy it’s available from the CRC Press or from Amazon.
**This code in this Notebook has been revised to work with PyMC 4.0**
::: {#8dd5d2b6 .cell execution_count=1}
``` {.python .cell-code}
import pandas as pd
:::
az.style.use("arviz-grayscale")
plt.rcParams['figure.dpi'] = 300
np.random.seed(521)
viridish = [(0.2823529411764706, 0.11372549019607843, 0.43529411764705883, 1.0),
(0.1450980392156863, 0.6705882352941176, 0.5098039215686274, 1.0),
(0.6901960784313725, 0.8666666666666667, 0.1843137254901961, 1.0)]A DIY Sampler, Do Not Try This at Home
Figure 1.1
grid = np.linspace(0, 1, 5000)
prior = stats.triang.pdf(grid, 0.5)
likelihood = stats.triang.pdf(0.2, grid)
posterior = prior * likelihood
log_prior = np.log(prior)
log_likelihood = np.log(likelihood)
log_posterior = log_prior + log_likelihood
_, ax = plt.subplots(1, 2, figsize=(10, 4))
ax[0].plot(grid, prior, label="prior", lw=2)
ax[0].plot(grid, likelihood, label="likelihood", lw=2, color="C2")
ax[0].plot(grid, posterior, label="posterior", lw=2, color="C4")
ax[0].set_xlabel("θ")
ax[0].legend()
ax[0].set_yticks([])
ax[1].plot(grid, log_prior, label="log-prior", lw=2)
ax[1].plot(grid, log_likelihood, label="log-likelihood", lw=2, color="C2")
ax[1].plot(grid, log_posterior, label="log-posterior", lw=2, color="C4")
ax[1].set_xlabel("θ")
ax[1].legend()
ax[1].set_yticks([])
# plt.savefig("chp01_bayesian_triad.png")/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/583882300.py:5: RuntimeWarning: divide by zero encountered in log
log_prior = np.log(prior)
Code 1.1
def post(θ, Y, α=1, β=1):
if 0 <= θ <= 1:
prior = stats.beta(α, β).pdf(θ)
like = stats.bernoulli(θ).pmf(Y).prod()
prop = like * prior
else:
prop = -np.inf
return propCode 1.2
Y = stats.bernoulli(0.7).rvs(20)Yarray([0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1])
Code 1.3
n_iters = 1000
can_sd = 0.05
α = β = 1
θ = 0.5
trace = {'θ':np.zeros(n_iters)}
p2 = post(θ, Y, α, β)
for iter in range(n_iters):
θ_can = stats.norm(θ, can_sd).rvs(1)
p1 = post(θ_can, Y, α, β)
pa = p1 / p2
if pa > stats.uniform(0, 1).rvs(1):
θ = θ_can
p2 = p1
trace['θ'][iter] = θ/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/2898404864.py:17: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
trace['θ'][iter] = θ
trace.keys()dict_keys(['θ'])
Code 1.5
az.summary(trace, kind='stats', round_to=2)| mean | sd | hdi_3% | hdi_97% | |
|---|---|---|---|---|
| θ | 0.69 | 0.1 | 0.51 | 0.87 |
Code 1.4 and Figure 1.2
_, axes = plt.subplots(1,2, figsize=(10, 4), constrained_layout=True, sharey=True)
axes[1].hist(trace['θ'], color='0.5', orientation="horizontal", density=True)
axes[1].set_xticks([])
axes[0].plot(trace['θ'], '0.5')
axes[0].set_ylabel('θ', rotation=0, labelpad=15)
# plt.savefig("chp01_traceplot.png")Text(0, 0.5, 'θ')
Say Yes to Automating Inference, Say No to Automated Model Building
Figure 1.3
az.plot_posterior(trace)
# plt.savefig("chp01_plot_posterior.png")Code 1.6
# Declare a model in PyMC3
with pm.Model() as model:
# Specify the prior distribution of unknown parameter
θ = pm.Beta("θ", alpha=1, beta=1)
# Specify the likelihood distribution and condition on the observed data
y_obs = pm.Binomial("y_obs", n=1, p=θ, observed=Y)
# Sample from the posterior distribution
idata = pm.sample(1000, return_inferencedata=True)Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [θ]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 0 seconds.
100.00% [8000/8000 00:00<00:00 Sampling 4 chains, 0 divergences]
Code 1.7
graphviz = pm.model_to_graphviz(model)
graphviz# graphviz.graph_attr.update(dpi="300")
# graphviz.render("chp01_BetaBinomModelGraphViz", format="png")A Few Options to Quantify Your Prior Information
Figure 1.5
pred_dists = (pm.sample_prior_predictive(samples=1000, model=model, var_names=["y_obs"]),
pm.sample_posterior_predictive(idata, model=model, var_names=["y_obs"]))Sampling: [y_obs, θ]
Sampling: [y_obs]
100.00% [4000/4000 00:00<00:00]
prior_pred = pred_dists[0]['prior_predictive']['y_obs'][0].values.sum(axis=1)post_pred = pred_dists[1]['posterior_predictive']['y_obs'][0].values.sum(axis=1)prior_dist = pm.draw(pm.Beta.dist(alpha=1, beta=1), draws=10000)fig, axes = plt.subplots(4, 1, figsize=(9, 9))
az.plot_dist(prior_pred, hist_kwargs={"color":"0.5", "bins":range(0, 22)}, ax=axes[1])
axes[1].set_title("Prior predictive distribution")
axes[1].set_xlim(-1, 21)
axes[1].set_ylim(0, 0.15)
axes[1].set_xlabel("number of succes")
az.plot_dist(post_pred, hist_kwargs={"color":"0.5", "bins":range(0, 22)}, ax=axes[3])
axes[3].set_title("Posterior predictive distribution")
axes[3].set_xlim(-1, 21)
axes[3].set_ylim(0, 0.15)
axes[3].set_xlabel("number of succes")
az.plot_dist(prior_dist, plot_kwargs={"color":"0.5"},
fill_kwargs={'alpha':1}, ax=axes[0])
axes[0].set_title("Prior distribution", fontweight='bold')
axes[0].set_xlim(0, 1)
axes[0].set_ylim(0, 4)
axes[0].tick_params(axis='both', pad=7)
axes[0].set_xlabel("θ")
az.plot_dist(idata.posterior["θ"], plot_kwargs={"color":"0.5"},
fill_kwargs={'alpha':1}, ax=axes[2])
axes[2].set_title("Posterior distribution", fontweight='bold')
axes[2].set_xlim(0, 1)
axes[2].set_ylim(0, 5)
axes[2].tick_params(axis='both', pad=7)
axes[2].set_xlabel("θ")
# plt.savefig("chp01_Bayesian_quartet_distributions.png")Text(0.5, 0, 'θ')
Figure 1.6
def gen_post_mean(trials):
p_mean = idata.posterior["θ"].mean().values
post_mean = []
for i in range(trials):
trial = pm.draw(pm.Binomial.dist(n=1, p=p_mean), draws=20).sum()
post_mean.append(trial)
return np.array(post_mean)post_mean = gen_post_mean(1000)ax = az.plot_dist(post_mean, label="posterior mean", figsize=(10,5),
hist_kwargs={"alpha": 0.5, "color":"C0", "bins":range(0, 22)})
ax = az.plot_dist(post_pred, label="posterior predictive", figsize=(10,5),
hist_kwargs={"alpha": 0.5, "color":"C4", "bins":range(0, 22)})
# plt.savefig("chp0_predictions_distributions.png")Code 1.8 and Figure 1.7
_, axes = plt.subplots(2,3, figsize=(12, 6), sharey=True, sharex=True,
constrained_layout=True)
axes = np.ravel(axes)
n_trials = [0, 1, 2, 3, 12, 180]
success = [0, 1, 1, 1, 6, 59]
data = zip(n_trials, success)
beta_params = [(0.5, 0.5), (1, 1), (10, 10)]
θ = np.linspace(0, 1, 1500)
for idx, (N, y) in enumerate(data):
s_n = ('s' if (N > 1) else '')
for jdx, (a_prior, b_prior) in enumerate(beta_params):
p_theta_given_y = stats.beta.pdf(θ, a_prior + y, b_prior + N - y)
axes[idx].plot(θ, p_theta_given_y, lw=4, color=viridish[jdx])
axes[idx].set_yticks([])
axes[idx].set_ylim(0, 12)
axes[idx].plot(np.divide(y, N), 0, color='k', marker='o', ms=12)
axes[idx].set_title(f'{N:4d} trial{s_n} {y:4d} success')
# plt.savefig('chp01_beta_binomial_update.png')/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/2664639977.py:19: RuntimeWarning: invalid value encountered in divide
axes[idx].plot(np.divide(y, N), 0, color='k', marker='o', ms=12)
Figure 1.8
θ = np.linspace(0, 1, 100)
κ = (θ / (1-θ))
y = 2
n = 7
_, axes = plt.subplots(2, 2, figsize=(10, 5),
sharex='col', sharey='row', constrained_layout=False)
axes[0, 0].set_title("Jeffreys' prior for Alice")
axes[0, 0].plot(θ, θ**(-0.5) * (1-θ)**(-0.5))
axes[1, 0].set_title("Jeffreys' posterior for Alice")
axes[1, 0].plot(θ, θ**(y-0.5) * (1-θ)**(n-y-0.5))
axes[1, 0].set_xlabel("θ")
axes[0, 1].set_title("Jeffreys' prior for Bob")
axes[0, 1].plot(κ, κ**(-0.5) * (1 + κ)**(-1))
axes[1, 1].set_title("Jeffreys' posterior for Bob")
axes[1, 1].plot(κ, κ**(y-0.5) * (1 + κ)**(-n-1))
axes[1, 1].set_xlim(-0.5, 10)
axes[1, 1].set_xlabel("κ")
axes[1, 1].text(-4.0, 0.030, size=18, s=r'$p(\theta \mid Y) \, \frac{d\theta}{d\kappa}$')
axes[1, 1].annotate("", xy=(-0.5, 0.025), xytext=(-4.5, 0.025),
arrowprops=dict(facecolor='black', shrink=0.05))
axes[1, 1].text(-4.0, 0.007, size=18, s= r'$p(\kappa \mid Y) \, \frac{d\kappa}{d\theta}$')
axes[1, 1].annotate("", xy=(-4.5, 0.015), xytext=(-0.5, 0.015),
arrowprops=dict(facecolor='black', shrink=0.05),
annotation_clip=False)
plt.subplots_adjust(wspace=0.4, hspace=0.4)
plt.tight_layout()
# plt.savefig("chp01_Jeffrey_priors.png")/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/445359648.py:2: RuntimeWarning: divide by zero encountered in divide
κ = (θ / (1-θ))
/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/445359648.py:10: RuntimeWarning: divide by zero encountered in power
axes[0, 0].plot(θ, θ**(-0.5) * (1-θ)**(-0.5))
/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/445359648.py:15: RuntimeWarning: divide by zero encountered in power
axes[0, 1].plot(κ, κ**(-0.5) * (1 + κ)**(-1))
/var/folders/gg/0m4l7pvs5zq9d8p4tzflypdc0000gn/T/ipykernel_14783/445359648.py:17: RuntimeWarning: invalid value encountered in multiply
axes[1, 1].plot(κ, κ**(y-0.5) * (1 + κ)**(-n-1))
Figure 1.9
cons = [[{"type": "eq", "fun": lambda x: np.sum(x) - 1}],
[{"type": "eq", "fun": lambda x: np.sum(x) - 1},
{"type": "eq", "fun": lambda x: 1.5 - np.sum(x * np.arange(1, 7))}],
[{"type": "eq", "fun": lambda x: np.sum(x) - 1},
{"type": "eq", "fun": lambda x: np.sum(x[[2, 3]]) - 0.8}]]
max_ent = []
for i, c in enumerate(cons):
val = minimize(lambda x: -entropy(x), x0=[1/6]*6, bounds=[(0., 1.)] * 6,
constraints=c)['x']
max_ent.append(entropy(val))
plt.plot(np.arange(1, 7), val, 'o--', color=viridish[i], lw=2.5)
plt.xlabel("$t$")
plt.ylabel("$p(t)$")
# plt.savefig("chp01_max_entropy.png")Text(0, 0.5, '$p(t)$')
Code 1.10
ite = 100_000
entropies = np.zeros((3, ite))
for idx in range(ite):
rnds = np.zeros(6)
total = 0
x_ = np.random.choice(np.arange(1, 7), size=6, replace=False)
for i in x_[:-1]:
rnd = np.random.uniform(0, 1-total)
rnds[i-1] = rnd
total = rnds.sum()
rnds[-1] = 1 - rnds[:-1].sum()
H = entropy(rnds)
entropies[0, idx] = H
if abs(1.5 - np.sum(rnds * x_)) < 0.01:
entropies[1, idx] = H
prob_34 = sum(rnds[np.argwhere((x_ == 3) | (x_ == 4)).ravel()])
if abs(0.8 - prob_34) < 0.01:
entropies[2, idx] = HFigure 1.10
_, ax = plt.subplots(1, 3, figsize=(12,4), sharex=True, sharey=True, constrained_layout=True)
for i in range(3):
az.plot_kde(entropies[i][np.nonzero(entropies[i])], ax=ax[i], plot_kwargs={"color":viridish[i], "lw":4})
ax[i].axvline(max_ent[i], 0, 1, ls="--")
ax[i].set_yticks([])
ax[i].set_xlabel("entropy")
# plt.savefig("chp01_max_entropy_vs_random_dist.png")Figure 1.11
x = np.linspace(0, 1, 500)
params = [(0.5, 0.5), (1, 1), (3,3), (100, 25)]
labels = ["Jeffreys", "MaxEnt", "Weakly Informative",
"Informative"]
_, ax = plt.subplots()
for (α, β), label, c in zip(params, labels, (0, 1, 4, 2)):
pdf = stats.beta.pdf(x, α, β)
ax.plot(x, pdf, label=f"{label}", c=f"C{c}", lw=3)
ax.set(yticks=[], xlabel="θ", title="Priors")
ax.legend()
# plt.savefig("chp01_prior_informativeness_spectrum.png")